###### LOADING LIBRARIES #####
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.0     ✓ dplyr   1.0.5
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
library(tidyr)
library(effects)
## Warning: package 'effects' was built under R version 4.0.5
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.0.5
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
library(ggplot2)

###### SET WORK SPACE AND READ IN FILE ######
setwd("~/Documents/PhD 2020/BeHo:ZiBA study/Data analysis/Tube test data analysis")
tube_test_wins = read.csv("R_input_files/Mice-full-overview.csv", header = TRUE)


###### TRANSFORM DATAFRAME FOR STATISTICS #######
#Placing NAs for dead mice (C8 mouse 3065, C16 mouse 3183, C7 mouse 2730, C3 mouse 3083, C1 mouse 4176)
tube_test_wins[76, c(32:45,48:49)] = NA
tube_test_wins[38, c(32:45,48:49)] = NA
tube_test_wins[32, c(32:45,48:49)] = NA
tube_test_wins[14, c(32:45,48:49)] = NA
tube_test_wins[4,c(28:45,47:49)] = NA

#Removing irrelevant columns
tube_test_80 <- tube_test_wins [,-c(5,8,10,15:17,21:23, 27, 31, 35:37, 39, 42:50)]

#Renaming columns without the many dots
names(tube_test_80)[2] <- "Arrive_weight"
names(tube_test_80)[5] <- "Sex"
names(tube_test_80)[6] <- "Exp_group"

#Transform the dataframe from a wide to a long format
tube_test_80 <- melt(tube_test_80, id.vars = c("Name","Arrive_weight", "Phenotype", "Crate", "Sex", "Exp_group","Cage.ID"))

#Renaming the columns created by melt()
names(tube_test_80)[8] <- "Treatments"
names(tube_test_80)[9] <- "Wins_per_rep"
names(tube_test_80)[7] <- "Cage_ID"
names(tube_test_80)[4] <- "Crate_ID"

#Add a new column with the replicate info and then rename the rows of column Treatments to only the treatments 
Replicates <- c(rep("R1",80), rep("R2",80), rep("R3",80),rep("R4",80),rep(c(rep("R1",80), rep("R2",80), rep("R3",80)),4),rep("R3",80), rep("R4",80), rep("R5",80))
#add the column "Replicates" to the dataframe tube_test_80
tube_test_80 <- cbind(tube_test_80,Replicates)
#Changing the name of the factors created after transforming the dataframe. There are 19 in total corresponding to the no of reps in total.
levels(tube_test_80$Treatments) <- c("OPT","OPT","OPT","OPT","HEAT","HEAT","HEAT","COLD","COLD","COLD","DIET","DIET","DIET","ANTI","ANTI","ANTI","FMT","FMT","FMT")
levels(tube_test_80$Exp_group) <- c("Treatment", "Control") #does the glm or lm model then know how to differ from the experimental and control animals tube test wins? Ask A & O this.

#Move the no of wins column (all of it) to the first column
colnames(tube_test_80)
##  [1] "Name"          "Arrive_weight" "Phenotype"     "Crate_ID"     
##  [5] "Sex"           "Exp_group"     "Cage_ID"       "Treatments"   
##  [9] "Wins_per_rep"  "Replicates"
tube_test_80 <- tube_test_80[, c(9,8,10,1:7)]

#Creating new factors to start statistics. A factor instead of num, chr or int account for variation between the "catagories"
tube_test_80 <- transform(tube_test_80,Replicates=as.factor(Replicates),
                            Phenotype=as.factor(Phenotype),
                            Crate=as.factor(Crate_ID),
                            Sex=as.factor(Sex),
                            Exp_group=as.factor(Exp_group),
                            Cage_ID=as.factor(Cage_ID))
str(tube_test_80)
## 'data.frame':    1520 obs. of  11 variables:
##  $ Wins_per_rep : int  2 3 0 4 1 4 1 2 3 0 ...
##  $ Treatments   : Factor w/ 6 levels "OPT","HEAT","COLD",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Replicates   : Factor w/ 5 levels "R1","R2","R3",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Name         : chr  "C01M3_4215" "C01M1_3942" "C01M2_2472" "C01M5_4176" ...
##  $ Arrive_weight: num  19 19.3 18.4 21.8 24 ...
##  $ Phenotype    : Factor w/ 6 levels "Black","Brown",..: 1 2 1 5 6 1 2 2 2 6 ...
##  $ Crate_ID     : chr  "Crate1" "Crate1" "Crate3" "Crate3" ...
##  $ Sex          : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Exp_group    : Factor w/ 2 levels "Control","Treatment": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Cage_ID      : Factor w/ 16 levels "01M","02M","03M",..: 1 1 1 1 1 2 2 2 2 2 ...
##  $ Crate        : Factor w/ 8 levels "Crate1","Crate2",..: 1 1 3 3 3 3 2 2 3 1 ...
#Remove mice that are not among the 40 I'm working with:
#I have mice from cage 3-6 (males) and 10-13 (females)
cages_to_remove <- c("01M","02M","07M","08M","09F","14F","15F","16F")
condition <- tube_test_80$Cage_ID %in% cages_to_remove
tube_test_40 <- tube_test_80[!condition, ]
# Remove rows with missing values (NA) in any column if needing the df like this later
cleaned_tube_test_40 <- drop_na(tube_test_40)

str(tube_test_40)
## 'data.frame':    760 obs. of  11 variables:
##  $ Wins_per_rep : int  3 0 3 1 3 0 2 4 1 3 ...
##  $ Treatments   : Factor w/ 6 levels "OPT","HEAT","COLD",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Replicates   : Factor w/ 5 levels "R1","R2","R3",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Name         : chr  "C03M2_1576" "C03M1_3856" "C03M3_4011" "C03M4_3083" ...
##  $ Arrive_weight: num  22.2 21 23.9 19 22.6 ...
##  $ Phenotype    : Factor w/ 6 levels "Black","Brown",..: 2 2 6 6 6 6 6 1 2 6 ...
##  $ Crate_ID     : chr  "Crate5" "Crate5" "Crate1" "Crate3" ...
##  $ Sex          : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Exp_group    : Factor w/ 2 levels "Control","Treatment": 2 2 2 2 2 1 1 1 1 1 ...
##  $ Cage_ID      : Factor w/ 16 levels "01M","02M","03M",..: 3 3 3 3 3 4 4 4 4 4 ...
##  $ Crate        : Factor w/ 8 levels "Crate1","Crate2",..: 5 5 1 3 2 2 1 5 2 5 ...
######## QUICK VISUALIZATION OF MY DATA (THE DEPENDENT VARIABLE) ##### 
hist(tube_test_80$Wins_per_rep, breaks=seq(0,4,by=0.2), ylim = c(0,400), xlab="Dominance rank", main="Histogram showing frequency of wins for all tube tests for all 80 mice")

hist(tube_test_40$Wins_per_rep, breaks=seq(0,4,by=0.2), ylim = c(0,200),xlab="Dominance rank", main="Histogram showing frequency of wins for all tube tests for the 40 specific mice")

######## STATISTICS - using glm (Generalized Linear Models) and lm (Multiple Linear Regression Models) ####
glimpse(tube_test_40) 
## Rows: 760
## Columns: 11
## $ Wins_per_rep  <int> 3, 0, 3, 1, 3, 0, 2, 4, 1, 3, 3, 1, 2, 4, 0, 1, 2, 4, 0,…
## $ Treatments    <fct> OPT, OPT, OPT, OPT, OPT, OPT, OPT, OPT, OPT, OPT, OPT, O…
## $ Replicates    <fct> R1, R1, R1, R1, R1, R1, R1, R1, R1, R1, R1, R1, R1, R1, …
## $ Name          <chr> "C03M2_1576", "C03M1_3856", "C03M3_4011", "C03M4_3083", …
## $ Arrive_weight <dbl> 22.23, 20.96, 23.90, 19.02, 22.55, 21.66, 16.23, 19.66, …
## $ Phenotype     <fct> Brown, Brown, White, White, White, White, White, Black, …
## $ Crate_ID      <chr> "Crate5", "Crate5", "Crate1", "Crate3", "Crate2", "Crate…
## $ Sex           <fct> Male, Male, Male, Male, Male, Male, Male, Male, Male, Ma…
## $ Exp_group     <fct> Treatment, Treatment, Treatment, Treatment, Treatment, C…
## $ Cage_ID       <fct> 03M, 03M, 03M, 03M, 03M, 04M, 04M, 04M, 04M, 04M, 05M, 0…
## $ Crate         <fct> Crate5, Crate5, Crate1, Crate3, Crate2, Crate2, Crate1, …
glimpse(tube_test_80)
## Rows: 1,520
## Columns: 11
## $ Wins_per_rep  <int> 2, 3, 0, 4, 1, 4, 1, 2, 3, 0, 3, 0, 3, 1, 3, 0, 2, 4, 1,…
## $ Treatments    <fct> OPT, OPT, OPT, OPT, OPT, OPT, OPT, OPT, OPT, OPT, OPT, O…
## $ Replicates    <fct> R1, R1, R1, R1, R1, R1, R1, R1, R1, R1, R1, R1, R1, R1, …
## $ Name          <chr> "C01M3_4215", "C01M1_3942", "C01M2_2472", "C01M5_4176", …
## $ Arrive_weight <dbl> 19.01, 19.28, 18.44, 21.80, 24.00, 20.20, 17.54, 20.78, …
## $ Phenotype     <fct> Black, Brown, Black, LIGHT BROWN, White, Black, Brown, B…
## $ Crate_ID      <chr> "Crate1", "Crate1", "Crate3", "Crate3", "Crate3", "Crate…
## $ Sex           <fct> Male, Male, Male, Male, Male, Male, Male, Male, Male, Ma…
## $ Exp_group     <fct> Treatment, Treatment, Treatment, Treatment, Treatment, T…
## $ Cage_ID       <fct> 01M, 01M, 01M, 01M, 01M, 02M, 02M, 02M, 02M, 02M, 03M, 0…
## $ Crate         <fct> Crate1, Crate1, Crate3, Crate3, Crate3, Crate3, Crate2, …
### lm (Multiple Linear Regression Models)
#ALL 80 MICE
#Because the Mice IDs are not of relevance here, we exclude the Name column, but test all other variables
lm_result_all <- lm(Wins_per_rep ~ .-Name,data=tube_test_80)
summary(lm_result_all)
## 
## Call:
## lm(formula = Wins_per_rep ~ . - Name, data = tube_test_80)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.7766 -0.9772 -0.1031  0.9403  3.0461 
## 
## Coefficients: (10 not defined because of singularities)
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           2.575308   0.427080   6.030 2.08e-09 ***
## TreatmentsHEAT        0.018433   0.110400   0.167 0.867418    
## TreatmentsCOLD        0.051767   0.110400   0.469 0.639212    
## TreatmentsDIET        0.038120   0.110802   0.344 0.730866    
## TreatmentsANTI       -0.109995   0.112421  -0.978 0.328027    
## TreatmentsFMT        -0.120770   0.130041  -0.929 0.353194    
## ReplicatesR2          0.025381   0.088230   0.288 0.773643    
## ReplicatesR3          0.004000   0.086977   0.046 0.963325    
## ReplicatesR4          0.046027   0.140104   0.329 0.742566    
## ReplicatesR5          0.025013   0.200248   0.125 0.900610    
## Arrive_weight         0.002154   0.016420   0.131 0.895642    
## PhenotypeBrown       -1.044612   0.104533  -9.993  < 2e-16 ***
## Phenotypecaramel      0.237762   0.340617   0.698 0.485269    
## Phenotypedark brown   1.383046   0.337843   4.094 4.48e-05 ***
## PhenotypeLIGHT BROWN -0.472570   0.211802  -2.231 0.025821 *  
## PhenotypeWhite       -1.043615   0.108200  -9.645  < 2e-16 ***
## Crate_IDCrate2       -0.370384   0.139157  -2.662 0.007862 ** 
## Crate_IDCrate3       -0.309293   0.155515  -1.989 0.046908 *  
## Crate_IDCrate4       -0.054754   0.240489  -0.228 0.819928    
## Crate_IDCrate5        0.269836   0.146165   1.846 0.065081 .  
## Crate_IDCrate6        0.262688   0.235866   1.114 0.265586    
## Crate_IDCrate7       -0.388380   0.237541  -1.635 0.102266    
## Crate_IDCrate8        0.667842   0.246728   2.707 0.006873 ** 
## SexMale                     NA         NA      NA       NA    
## Exp_groupTreatment   -0.313367   0.198050  -1.582 0.113809    
## Cage_ID02M            0.722391   0.193997   3.724 0.000204 ***
## Cage_ID03M            0.585602   0.210658   2.780 0.005508 ** 
## Cage_ID04M            0.264191   0.285637   0.925 0.355162    
## Cage_ID05M            0.120988   0.201548   0.600 0.548405    
## Cage_ID06M           -0.222318   0.202192  -1.100 0.271717    
## Cage_ID07M            0.007626   0.202175   0.038 0.969918    
## Cage_ID08M            0.785715   0.216479   3.630 0.000294 ***
## Cage_ID09F           -0.450626   0.214372  -2.102 0.035719 *  
## Cage_ID10F            0.743322   0.201076   3.697 0.000227 ***
## Cage_ID11F           -0.054298   0.203884  -0.266 0.790033    
## Cage_ID12F            0.361630   0.204612   1.767 0.077372 .  
## Cage_ID13F                  NA         NA      NA       NA    
## Cage_ID14F            0.455238   0.201021   2.265 0.023683 *  
## Cage_ID15F            0.803178   0.205041   3.917 9.38e-05 ***
## Cage_ID16F                  NA         NA      NA       NA    
## CrateCrate2                 NA         NA      NA       NA    
## CrateCrate3                 NA         NA      NA       NA    
## CrateCrate4                 NA         NA      NA       NA    
## CrateCrate5                 NA         NA      NA       NA    
## CrateCrate6                 NA         NA      NA       NA    
## CrateCrate7                 NA         NA      NA       NA    
## CrateCrate8                 NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.238 on 1450 degrees of freedom
##   (33 observations deleted due to missingness)
## Multiple R-squared:  0.1691, Adjusted R-squared:  0.1485 
## F-statistic: 8.197 on 36 and 1450 DF,  p-value: < 2.2e-16
plot(lm_result_all)

#SPCEIFIC 40 MICE ONLY
lm_result_all <- lm(Wins_per_rep ~ .-Name,data=tube_test_40)
summary(lm_result_all)
## 
## Call:
## lm(formula = Wins_per_rep ~ . - Name, data = tube_test_40)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.43206 -0.95705 -0.04025  0.87291  2.86905 
## 
## Coefficients: (10 not defined because of singularities)
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           3.618482   0.694919   5.207 2.50e-07 ***
## TreatmentsHEAT        0.014868   0.154077   0.096  0.92315    
## TreatmentsCOLD        0.006535   0.154077   0.042  0.96618    
## TreatmentsDIET        0.014868   0.154077   0.096  0.92315    
## TreatmentsANTI       -0.070838   0.155259  -0.456  0.64834    
## TreatmentsFMT        -0.058325   0.179371  -0.325  0.74515    
## ReplicatesR2         -0.005025   0.122585  -0.041  0.96731    
## ReplicatesR3         -0.003941   0.120822  -0.033  0.97399    
## ReplicatesR4         -0.018515   0.194456  -0.095  0.92417    
## ReplicatesR5          0.001593   0.275172   0.006  0.99538    
## Arrive_weight        -0.023974   0.026451  -0.906  0.36504    
## PhenotypeBrown       -1.331121   0.139145  -9.566  < 2e-16 ***
## PhenotypeLIGHT BROWN  0.217656   0.341217   0.638  0.52375    
## PhenotypeWhite       -1.439426   0.151814  -9.482  < 2e-16 ***
## Crate_IDCrate2        0.561053   0.193550   2.899  0.00386 ** 
## Crate_IDCrate3       -0.284023   0.269166  -1.055  0.29169    
## Crate_IDCrate4       -0.067226   0.337371  -0.199  0.84211    
## Crate_IDCrate5        0.484845   0.175304   2.766  0.00582 ** 
## Crate_IDCrate6       -0.309033   0.291899  -1.059  0.29009    
## Crate_IDCrate7       -0.703396   0.310029  -2.269  0.02357 *  
## Crate_IDCrate8        0.262489   0.365814   0.718  0.47327    
## SexMale                     NA         NA      NA       NA    
## Exp_groupTreatment   -0.126818   0.218510  -0.580  0.56184    
## Cage_ID04M           -0.431109   0.296289  -1.455  0.14609    
## Cage_ID05M           -0.714613   0.239672  -2.982  0.00296 ** 
## Cage_ID06M           -0.994507   0.222106  -4.478 8.76e-06 ***
## Cage_ID10F            0.518879   0.202433   2.563  0.01057 *  
## Cage_ID11F           -0.452999   0.235648  -1.922  0.05495 .  
## Cage_ID12F                  NA         NA      NA       NA    
## Cage_ID13F                  NA         NA      NA       NA    
## CrateCrate2                 NA         NA      NA       NA    
## CrateCrate3                 NA         NA      NA       NA    
## CrateCrate4                 NA         NA      NA       NA    
## CrateCrate5                 NA         NA      NA       NA    
## CrateCrate6                 NA         NA      NA       NA    
## CrateCrate7                 NA         NA      NA       NA    
## CrateCrate8                 NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.223 on 727 degrees of freedom
##   (6 observations deleted due to missingness)
## Multiple R-squared:  0.2183, Adjusted R-squared:  0.1903 
## F-statistic: 7.807 on 26 and 727 DF,  p-value: < 2.2e-16
plot(lm_result_all)

### glm (Generalized Linear Models) using the poisson model
#ALL 80 MICE
#Because the Mice IDs are not of relevance here, we exclude the Name column, but test all other variables
glm_result <- glm(Wins_per_rep ~ .-Name,data=tube_test_80,family = poisson())
summary(glm_result)
## 
## Call:
## glm(formula = Wins_per_rep ~ . - Name, family = poisson(), data = tube_test_80)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -2.42271  -0.77571  -0.08021   0.65960   2.10534  
## 
## Coefficients: (10 not defined because of singularities)
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           0.985206   0.250896   3.927 8.61e-05 ***
## TreatmentsHEAT        0.009598   0.064018   0.150 0.880826    
## TreatmentsCOLD        0.026547   0.063737   0.417 0.677035    
## TreatmentsDIET        0.019482   0.064033   0.304 0.760933    
## TreatmentsANTI       -0.057376   0.066122  -0.868 0.385540    
## TreatmentsFMT        -0.063008   0.076455  -0.824 0.409875    
## ReplicatesR2          0.013072   0.051132   0.256 0.798218    
## ReplicatesR3          0.002011   0.050561   0.040 0.968277    
## ReplicatesR4          0.024022   0.081529   0.295 0.768262    
## ReplicatesR5          0.013077   0.118303   0.111 0.911981    
## Arrive_weight         0.001826   0.010085   0.181 0.856304    
## PhenotypeBrown       -0.542872   0.060904  -8.914  < 2e-16 ***
## Phenotypecaramel      0.124981   0.177132   0.706 0.480448    
## Phenotypedark brown   0.445839   0.162741   2.740 0.006152 ** 
## PhenotypeLIGHT BROWN -0.220393   0.123177  -1.789 0.073577 .  
## PhenotypeWhite       -0.538552   0.064836  -8.306  < 2e-16 ***
## Crate_IDCrate2       -0.202961   0.084941  -2.389 0.016875 *  
## Crate_IDCrate3       -0.177914   0.093150  -1.910 0.056136 .  
## Crate_IDCrate4       -0.051602   0.149047  -0.346 0.729180    
## Crate_IDCrate5        0.110287   0.084200   1.310 0.190259    
## Crate_IDCrate6        0.103837   0.141628   0.733 0.463459    
## Crate_IDCrate7       -0.254371   0.149675  -1.699 0.089228 .  
## Crate_IDCrate8        0.310743   0.149235   2.082 0.037321 *  
## SexMale                     NA         NA      NA       NA    
## Exp_groupTreatment   -0.213216   0.122260  -1.744 0.081167 .  
## Cage_ID02M            0.403339   0.117941   3.420 0.000627 ***
## Cage_ID03M            0.352764   0.131309   2.687 0.007220 ** 
## Cage_ID04M            0.089943   0.174109   0.517 0.605445    
## Cage_ID05M            0.078818   0.121094   0.651 0.515120    
## Cage_ID06M           -0.071373   0.121449  -0.588 0.556747    
## Cage_ID07M            0.024071   0.125045   0.192 0.847354    
## Cage_ID08M            0.449968   0.132245   3.403 0.000668 ***
## Cage_ID09F           -0.198612   0.137774  -1.442 0.149422    
## Cage_ID10F            0.451013   0.124131   3.633 0.000280 ***
## Cage_ID11F            0.005785   0.124976   0.046 0.963078    
## Cage_ID12F            0.209845   0.125971   1.666 0.095748 .  
## Cage_ID13F                  NA         NA      NA       NA    
## Cage_ID14F            0.298496   0.123355   2.420 0.015528 *  
## Cage_ID15F            0.480633   0.127573   3.767 0.000165 ***
## Cage_ID16F                  NA         NA      NA       NA    
## CrateCrate2                 NA         NA      NA       NA    
## CrateCrate3                 NA         NA      NA       NA    
## CrateCrate4                 NA         NA      NA       NA    
## CrateCrate5                 NA         NA      NA       NA    
## CrateCrate6                 NA         NA      NA       NA    
## CrateCrate7                 NA         NA      NA       NA    
## CrateCrate8                 NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 1787.1  on 1486  degrees of freedom
## Residual deviance: 1560.5  on 1450  degrees of freedom
##   (33 observations deleted due to missingness)
## AIC: 4872.6
## 
## Number of Fisher Scoring iterations: 5
plot(glm_result)

#SPECIFIC 40 MICE ONLY
glm_result <- glm(Wins_per_rep ~ .-Name,data=tube_test_40,family = poisson())
summary(glm_result)
## 
## Call:
## glm(formula = Wins_per_rep ~ . - Name, family = poisson(), data = tube_test_40)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -2.19239  -0.77496  -0.03654   0.58023   2.04397  
## 
## Coefficients: (10 not defined because of singularities)
##                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           1.2880107  0.4207857   3.061 0.002206 ** 
## TreatmentsHEAT        0.0074473  0.0892944   0.083 0.933532    
## TreatmentsCOLD        0.0032720  0.0893920   0.037 0.970802    
## TreatmentsDIET        0.0074473  0.0892944   0.083 0.933532    
## TreatmentsANTI       -0.0361338  0.0907774  -0.398 0.690595    
## TreatmentsFMT        -0.0297379  0.1049425  -0.283 0.776891    
## ReplicatesR2         -0.0025349  0.0712018  -0.036 0.971601    
## ReplicatesR3         -0.0019785  0.0701821  -0.028 0.977510    
## ReplicatesR4         -0.0094148  0.1134252  -0.083 0.933848    
## ReplicatesR5          0.0009109  0.1610300   0.006 0.995486    
## Arrive_weight        -0.0036823  0.0164730  -0.224 0.823119    
## PhenotypeBrown       -0.6409863  0.0782605  -8.190 2.60e-16 ***
## PhenotypeLIGHT BROWN  0.0787051  0.1718906   0.458 0.647039    
## PhenotypeWhite       -0.7301891  0.0921219  -7.926 2.26e-15 ***
## Crate_IDCrate2        0.3247412  0.1136101   2.858 0.004258 ** 
## Crate_IDCrate3       -0.1621914  0.1753692  -0.925 0.355040    
## Crate_IDCrate4       -0.0275113  0.2087908  -0.132 0.895170    
## Crate_IDCrate5        0.2385385  0.1055724   2.259 0.023854 *  
## Crate_IDCrate6       -0.1391767  0.1749053  -0.796 0.426191    
## Crate_IDCrate7       -0.3895277  0.1966837  -1.980 0.047650 *  
## Crate_IDCrate8        0.1731069  0.2250214   0.769 0.441721    
## SexMale                      NA         NA      NA       NA    
## Exp_groupTreatment   -0.0607722  0.1359041  -0.447 0.654753    
## Cage_ID04M           -0.2291182  0.1803810  -1.270 0.204017    
## Cage_ID05M           -0.3984480  0.1445578  -2.756 0.005846 ** 
## Cage_ID06M           -0.4778446  0.1314431  -3.635 0.000278 ***
## Cage_ID10F            0.2945573  0.1264076   2.330 0.019795 *  
## Cage_ID11F           -0.2249200  0.1489704  -1.510 0.131087    
## Cage_ID12F                   NA         NA      NA       NA    
## Cage_ID13F                   NA         NA      NA       NA    
## CrateCrate2                  NA         NA      NA       NA    
## CrateCrate3                  NA         NA      NA       NA    
## CrateCrate4                  NA         NA      NA       NA    
## CrateCrate5                  NA         NA      NA       NA    
## CrateCrate6                  NA         NA      NA       NA    
## CrateCrate7                  NA         NA      NA       NA    
## CrateCrate8                  NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 906.16  on 753  degrees of freedom
## Residual deviance: 753.71  on 727  degrees of freedom
##   (6 observations deleted due to missingness)
## AIC: 2471.5
## 
## Number of Fisher Scoring iterations: 5
plot(glm_result)

#The glm model results of 80 vs. 40 mice differ


#However, other families may be a better fit for my data, and thus, I read about them using ?glm()
#I also used ChatGPT to help me understand the definition of each family 
#Turns out the Quasipoisson could fit my data better than poisson
#So to find out if this is the case, I have to explore my data
#If the  variance is larger than the mean in my dataset (=overdispersion), then quasipoisson is a better fit
#You only calculate the variance (sd) and mean of the dependent variable, which in my case is number of wins

#First testing this with all 80 mice
dependent_variable_wins <- tube_test_80$Wins_per_rep
mean_value <- mean(dependent_variable_wins,na.rm=TRUE)
sd_value <- sd(dependent_variable_wins,na.rm=TRUE)

coefficient_dispersion <- (sd_value / mean_value) * 100
coefficient_variation <- (sd_value / mean_value) * 100

print(coefficient_dispersion) #69.62755
## [1] 69.62755
print(coefficient_variation) #69.62755
## [1] 69.62755
#The results of the 40 specific mice (tube_test_40) were: 68.8126 and 68.8126
#In this case, the standard deviation (sd) = variance is equal to the mean, resulting in the coefficients being the same.
#So since my data is count data with no overdispersion, the Poisson family might be suitable indeed.
#When the standard deviation is equal to the mean, it implies that the data values are evenly distributed around the mean (which is clear when visualising the number of wins in a histogram). 
#This can occur when the data is symmetrically distributed or when there is a specific relationship between the values in your dataset. The latter is my data (0,1,2,3 or 4 wins)

#I looked at papers doing tube test analyses and one named "social hierarchy position in female mice is associated with plasma corticosterone levels and hypothalamic gene expression" (2019) by Williamson et al. uses a Poisson-lognormal distribution test.
#I want to see if the Poisson-lognormal distribution test fits my data better than regular poisson

#TEST ON SPECIFIC 40 MICE
# Fit Poisson regression model
#poisson_model <- glm(Wins_per_rep ~ . - Name, data = cleaned_tube_test_40, family = "poisson")
poisson_model <- glm(Wins_per_rep ~ Treatments + Cage_ID, data = cleaned_tube_test_40, family = "poisson")

# Fit Poisson-lognormal regression model with log-transformed response
#log_count_var <- log(cleaned_tube_test_40$Wins_per_rep)
#cleaned_tube_test_40$log_wins <- log_count_var
#head(cleaned_tube_test_40)
#poisson_lognormal_model <- glm(log_wins ~ . - Name, data = cleaned_tube_test_40, family = "gaussian") #does not work
#poisson_lognormal_model <- glm(log_wins ~ Treatments + Cage_ID, data = cleaned_tube_test_40, family = "gaussian") #does not work
#cleaned_tube_test_40 <- within(cleaned_tube_test_40, rm(log_wins))
#poisson_lognormal_model <- glm(log(Wins_per_rep) ~ . - Name, data = cleaned_tube_test_40, family = "gaussian") #does not work
#poisson_lognormal_model <- glm(log(Wins_per_rep) ~ Treatments + Cage_ID, data = cleaned_tube_test_40, family = "gaussian") #does not work